### tf 分层学习率 ##########
"""
var_list1 = [variables from first 5 layers]
var_list2 = [the rest of variables]
opt1 = tf.train.GradientDescentOptimizer(0.00001)
opt2 = tf.train.GradientDescentOptimizer(0.0001)
grads = tf.gradients(loss, var_list1 + var_list2)
grads1 = grads[:len(var_list1)]
grads2 = grads[len(var_list1):]
tran_op1 = opt1.apply_gradients(zip(grads1, var_list1))
train_op2 = opt2.apply_gradients(zip(grads2, var_list2))
train_op = tf.group(train_op1, train_op2)
"""
